## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
##
## 다음의 패키지를 부착합니다: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
## Get current Data in the four files
url_in <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"
file_names <- c("time_series_covid19_confirmed_global.csv",
"time_series_covid19_deaths_global.csv"
)
urls <- str_c(url_in, file_names)
url_in_pop <- "https://raw.githubusercontent.com/datasets/population-growth-estimates-and-projections/master/data/"
file_name_pop <- "population-constant-fertility.csv"
urls_pop <- str_c(url_in_pop, file_name_pop)
Let’s read in the data and see what we have.
global_cases <- read_csv(urls[1])
global_deaths <- read_csv(urls[2])
world_pop <- read_csv(urls_pop)
world_pop$Population <- as.double(world_pop$Population) * 1000
## Warning: 강제형변환에 의해 생성된 NA 입니다
global_cases_pivot <- global_cases %>%
pivot_longer(cols = -c('Province/State',
'Country/Region', Lat, Long),
names_to = "date",
values_to = "cases") %>%
select(-c(Lat,Long))
global_deaths_pivot <- global_deaths %>%
pivot_longer(cols = -c('Province/State',
'Country/Region', Lat, Long),
names_to = "date",
values_to = "deaths") %>%
select(-c(Lat,Long))
global_cases_pivot <- global_cases_pivot %>%
rename(Country_Region = `Country/Region`, Province_State = `Province/State`) %>%
mutate(date=mdy(date))
global_deaths_pivot <- global_deaths_pivot %>%
rename(Country_Region = `Country/Region`, Province_State = `Province/State`) %>%
mutate(date=mdy(date))
global_cases_pivot <- global_cases_pivot %>% filter(cases > 0)
global_deaths_pivot <- global_deaths_pivot %>% filter(deaths > 0)
global_cases_pivot$Year <- format(global_cases_pivot$date, format="%Y")
global_deaths_pivot$Year <- format(global_deaths_pivot$date, format="%Y")
global_cases_pivot$Country_Region2 = global_cases_pivot$Country_Region
global_deaths_pivot$Country_Region2 = global_deaths_pivot$Country_Region
Some country name in Covid 19 data set and population data set are
different.
To merge them, replace the country names in Covid 19 data.
global_cases_pivot[global_cases_pivot$Country_Region2=="Korea, North", "Country_Region2"] <- "Dem. People's Republic of Korea"
global_cases_pivot[global_cases_pivot$Country_Region2=="Korea, South", "Country_Region2"] <- "Republic of Korea"
global_cases_pivot[global_cases_pivot$Country_Region2=="Bolivia", "Country_Region2"] <- "Bolivia (Plurinational State of)"
global_cases_pivot[global_cases_pivot$Country_Region2=="Brunei", "Country_Region2"] <- "Brunei Darussalam"
global_cases_pivot[global_cases_pivot$Country_Region2=="Congo (Brazzaville)", "Country_Region2"] <- "Congo"
global_cases_pivot[global_cases_pivot$Country_Region2=="Congo (Kinshasa)", "Country_Region2"] <- "Congo"
global_cases_pivot[global_cases_pivot$Country_Region2=="Cote d'Ivoire", "Country_Region2"] <- "Côte d'Ivoire"
global_cases_pivot[global_cases_pivot$Country_Region2=="Iran", "Country_Region2"] <- "Iran (Islamic Republic of)"
global_cases_pivot[global_cases_pivot$Country_Region2=="Kosovo", "Country_Region2"] <- "Republic of Korea"
global_cases_pivot[global_cases_pivot$Country_Region2=="Laos", "Country_Region2"] <- "Lao People's Democratic Republic"
global_cases_pivot[global_cases_pivot$Country_Region2=="Russia", "Country_Region2"] <- "Russian Federation"
global_cases_pivot[global_cases_pivot$Country_Region2=="Syria", "Country_Region2"] <- "Syrian Arab Republic"
global_cases_pivot[global_cases_pivot$Country_Region2=="Taiwan*", "Country_Region2"] <- "China, Taiwan Province of China"
global_cases_pivot[global_cases_pivot$Country_Region2=="US", "Country_Region2"] <- "United States of America"
global_cases_pivot[global_cases_pivot$Country_Region2=="Venezuela", "Country_Region2"] <- "Venezuela (Bolivarian Republic of)"
global_cases_pivot[global_cases_pivot$Country_Region2=="Vietnam", "Country_Region2"] <- "Viet Nam"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Korea, North", "Country_Region2"] <- "Dem. People's Republic of Korea"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Korea, South", "Country_Region2"] <- "Republic of Korea"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Bolivia", "Country_Region2"] <- "Bolivia (Plurinational State of)"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Brunei", "Country_Region2"] <- "Brunei Darussalam"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Congo (Brazzaville)", "Country_Region2"] <- "Congo"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Congo (Kinshasa)", "Country_Region2"] <- "Congo"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Cote d'Ivoire", "Country_Region2"] <- "Côte d'Ivoire"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Iran", "Country_Region2"] <- "Iran (Islamic Republic of)"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Kosovo", "Country_Region2"] <- "Republic of Korea"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Laos", "Country_Region2"] <- "Lao People's Democratic Republic"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Russia", "Country_Region2"] <- "Russian Federation"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Syria", "Country_Region2"] <- "Syrian Arab Republic"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Taiwan*", "Country_Region2"] <- "China, Taiwan Province of China"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="US", "Country_Region2"] <- "United States of America"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Venezuela", "Country_Region2"] <- "Venezuela (Bolivarian Republic of)"
global_deaths_pivot[global_deaths_pivot$Country_Region2=="Vietnam", "Country_Region2"] <- "Viet Nam"
Below Country has no Population data
* Antarctica * Burma * Diamon Princess * Kosovo * Moldova * MS Zaandam *
Tanzania * West Bank and Gaza
global_cases_pivot <- merge(global_cases_pivot,world_pop, by.x=c("Country_Region2","Year"), by.y=c("Region","Year"), all.x = TRUE)
global_deaths_pivot <- merge(global_deaths_pivot,world_pop, by.x=c("Country_Region2","Year"), by.y=c("Region","Year"), all.x = TRUE)
global_cases_pivot <- global_cases_pivot[!(is.na(global_cases_pivot$Population)),]
global_deaths_pivot <- global_deaths_pivot[!(is.na(global_deaths_pivot$Population)),]
To know the most suffered country and the safest country in the
world, we need to compare number of cases and deaths.
However, each country has different population and more populated
country likely to have more cases and deaths.
So comparing cases and deaths has no meaning.
Calculate average cases and deaths per thousand people to compare
countries.
global_cases_pivot$case_per_thousand <- global_cases_pivot$cases / global_cases_pivot$Population * 1000
global_deaths_pivot$death_per_thousand <- global_deaths_pivot$deaths / global_deaths_pivot$Population * 1000
average_cases = global_cases_pivot %>% group_by(Country_Region) %>% summarise(average_case_per_thousand = mean(case_per_thousand), .groups = 'drop')
average_cases <- average_cases[!is.na(average_cases$average_case_per_thousand),]
average_cases <- average_cases[average_cases$Country_Region!="Korea, North",]
worst_case_country = average_cases[average_cases$average_case_per_thousand==max(average_cases$average_case_per_thousand), "Country_Region"]
best_case_country = average_cases[average_cases$average_case_per_thousand==min(average_cases$average_case_per_thousand), "Country_Region"]
average_deaths = global_deaths_pivot %>% group_by(Country_Region) %>% summarise(average_death_per_thousand = mean(death_per_thousand), .groups = 'drop')
average_deaths <- average_deaths[!is.na(average_deaths$average_death_per_thousand),]
worst_death_country = average_deaths[average_deaths$average_death_per_thousand==max(average_deaths$average_death_per_thousand), "Country_Region"]
best_death_country <- average_deaths[average_deaths$average_death_per_thousand==min(average_deaths$average_death_per_thousand), "Country_Region"]
North Korea has the lowest average cases per thousand people.
But this country does not share proper information to outside of the
country.
This data can not be trusted.
Micronesia is the safest country where has the lowest average cases
per thousand people.
On the other hand, cases in the Andorra has been growing continuously
and average cases per thousand people is the highest.
best_case_pivot = global_cases_pivot[global_cases_pivot$Country_Region==best_case_country[[1]], ]
ggplot(best_case_pivot, aes(x=date, y=case_per_thousand)) +
geom_area(fill="seagreen", alpha=0.4) +
geom_line(color="seagreen", size=2) +
geom_point(size=3, color="seagreen") +
theme_ipsum() +
ggtitle(paste("The lowest case country - ", best_case_country[[1]])) +
ylab("Cases per thousand ppl") +
xlab("Date")
worst_case_pivot = global_cases_pivot[global_cases_pivot$Country_Region==worst_case_country[[1]], ]
ggplot(worst_case_pivot, aes(x=date, y=case_per_thousand)) +
geom_area(fill="violetred", alpha=0.4) +
geom_line(color="violetred", size=2) +
geom_point(size=3, color="violetred") +
theme_ipsum() +
ggtitle(paste("The highest case country - ", worst_case_country[[1]])) +
ylab("Cases per thousand ppl") +
xlab("Date")
Although China is known as the origin of Covid 19, China has the
lowest average death per thousand people.
It turns out that Peru is the most suffered country in the world.
best_death_pivot = global_deaths_pivot[global_deaths_pivot$Country_Region==best_death_country[[1]], ]
ggplot(best_death_pivot, aes(x=date, y=death_per_thousand)) +
geom_area(fill="seagreen", alpha=0.4) +
geom_line(color="seagreen", size=2) +
geom_point(size=3, color="seagreen") +
theme_ipsum() +
ggtitle(paste("The lowest death country - ", best_death_country[[1]])) +
ylab("Deaths per thousand ppl") +
xlab("Date")